In [1]:

import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd  # for display and clear_output
import time  # for sleep

In [2]:

# Make some training data
n = 20
X = np.linspace(0., 20.0, n).reshape((n, 1)) - 10
T = 0.2 + 0.05 * (X + 10) + 0.4 * np.sin(X + 10) + 0.2 * np.random.normal(size=(n, 1))

# Make some testing data
Xtest = X + 0.1 * np.random.normal(size=(n, 1))
Ttest = 0.2 + 0.05 * (X + 10) + 0.4 * np.sin(Xtest + 10) + 0.2 * np.random.normal(size=(n, 1))

nSamples = X.shape[0]
nOutputs = T.shape[1]

In [3]:

plt.plot(X, T, label='Training Data')
plt.plot(Xtest, Ttest, label='Testing Data')
plt.legend();

In [6]:

import torch
torch.__version__

Out[6]:

'1.4.0'

In [7]:

Out[7]:

array([[-10.        ],
       [ -8.94736842],
       [ -7.89473684],
       [ -6.84210526],
       [ -5.78947368],
       [ -4.73684211],
       [ -3.68421053],
       [ -2.63157895],
       [ -1.57894737],
       [ -0.52631579],
       [  0.52631579],
       [  1.57894737],
       [  2.63157895],
       [  3.68421053],
       [  4.73684211],
       [  5.78947368],
       [  6.84210526],
       [  7.89473684],
       [  8.94736842],
       [ 10.        ]])

In [8]:

Out[8]:

array([[-0.01899775],
       [ 0.4519176 ],
       [ 0.22212158],
       [ 0.50840654],
       [ 0.45192538],
       [ 0.19781084],
       [ 0.44381358],
       [ 1.23674924],
       [ 1.36424233],
       [ 0.43769945],
       [ 0.37365926],
       [ 0.59368722],
       [ 0.56137313],
       [ 0.9982631 ],
       [ 1.43205101],
       [ 1.04331966],
       [ 0.77279633],
       [ 0.86416673],
       [ 1.38513808],
       [ 1.65618235]])

In [11]:

torch.from_numpy(X).float()

Out[11]:

tensor([[-10.0000],
        [ -8.9474],
        [ -7.8947],
        [ -6.8421],
        [ -5.7895],
        [ -4.7368],
        [ -3.6842],
        [ -2.6316],
        [ -1.5789],
        [ -0.5263],
        [  0.5263],
        [  1.5789],
        [  2.6316],
        [  3.6842],
        [  4.7368],
        [  5.7895],
        [  6.8421],
        [  7.8947],
        [  8.9474],
        [ 10.0000]])

In [13]:

V = np.random.uniform(-0.1, 0.1, size=(1, 10))
V.shape

Out[13]:

(1, 10)

In [17]:

nnet = torch.nn.Sequential(torch.nn.Linear(1, nHiddens), torch.nn.Tanh(), 
                           torch.nn.Linear(nHiddens, 1))
nnet

Out[17]:

Sequential(
  (0): Linear(in_features=1, out_features=10, bias=True)
  (1): Tanh()
  (2): Linear(in_features=10, out_features=1, bias=True)
)

In [18]:

nnet(torch.tensor([10.0]))

Out[18]:

tensor([0.3425], grad_fn=<AddBackward0>)

In [47]:

for layer in  nnet:
    print(layer)

Linear(in_features=1, out_features=10, bias=True)
Tanh()
Linear(in_features=10, out_features=20, bias=True)
Tanh()
Linear(in_features=20, out_features=10, bias=True)
Tanh()
Linear(in_features=10, out_features=1, bias=True)

In [54]:

len(nnet)

Out[54]:

In [63]:

# Set parameters of neural network
nHiddens = 10

learning_rate = 0.01
# rhoh = 0.1
# rhoo = 0.4

# rh = rhoh / (nSamples * nOutputs)
#ro = rhoo / (nSamples * nOutputs)

# Initialize weights to uniformly distributed values between small normally-distributed between -0.1 and 0.1
#V = 0.1 * 2 * (np.random.uniform(size=(1 + 1, nHiddens)) - 0.5)
#W = 0.1 * 2 * (np.random.uniform(size=(1 + nHiddens, nOutputs)) - 0.5)


Xt = torch.from_numpy(X).float()
Tt = torch.from_numpy(T).float()

Xtestt = torch.from_numpy(Xtest).float()
Ttestt = torch.from_numpy(Ttest).float()

# Add constant column of 1's
#def addOnes(A):
#    return np.insert(A, 0, 1, axis=1)

#X1 = addOnes(X)
#Xtest1 = addOnes(Xtest)

# Take nSteps steepest descent steps in gradient descent search in mean-squared-error function
nSteps = 100000

# collect training and testing errors for plotting
errorTrace = np.zeros((nSteps, 2))

nnet = torch.nn.Sequential(torch.nn.Linear(1, 10), torch.nn.Tanh(), 
                           torch.nn.Linear(10, 20), torch.nn.Tanh(), 
                           torch.nn.Linear(20, 10), torch.nn.Tanh(), 
                           torch.nn.Linear(10, 1))

mse_f = torch.nn.MSELoss()
optimizer = torch.optim.SGD(nnet.parameters(), lr=learning_rate)

fig = plt.figure(figsize=(10, 12))

def forward_all_layers(X):
    Ys = [X]
    for layer in nnet:
        Ys.append(layer(Ys[-1]))
    return Ys[1:]
        
for step in range(nSteps):

    # Forward pass on training data
    # Z = np.tanh(X1 @ V)
    # Z1 = addOnes(Z)
    # Y = Z1 @ W
    
    Y = nnet(Xt)

    # Error in output
    # error = T - Y
    mse = mse_f(Y, Tt)

    # Backward pass - the backpropagation and weight update steps
    # V = V + rh * X1.T @ ( ( error @ W[1:, :].T) * (1 - Z**2))
    # W = W + ro * Z1.T @ error
    
    optimizer.zero_grad()
    mse.backward()
    optimizer.step()
    
    # error traces for plotting
    errorTrace[step, 0] = mse.sqrt()
    
    Ytest = nnet(Xtestt)
    mse_test = mse_f(Ytest, Ttestt)
    # Ytest = addOnes(np.tanh(Xtest1 @ V)) @ W  #!! Forward pass in one line
    errorTrace[step, 1] = mse_test.sqrt()

    if step % 1000 == 0 or step == nSteps-1:
        plt.clf()
        
        n_hidden_layers = (len(nnet) - 1) //2
        nplots = 2 + n_hidden_layers
        # Plot the trace of the mean squared error on training and testing data
        plt.subplot(nplots, 1, 1)
        plt.plot(errorTrace[:step, :])
        plt.ylim(0, 0.7)
        plt.xlabel('Epochs')
        plt.ylabel('RMSE')
        plt.legend(('Train','Test'), loc='upper left')
        
        # Plot the training and testing data, and 
        # the output of our neural network model on the test data
        plt.subplot(nplots, 1, 2)
        plt.plot(X, T, 'o-', Xtest, Ttest, 'o-', Xtest, Ytest.detach(), 'o-')
        plt.xlim(-10, 10)
        plt.legend(('Training','Testing','Model'), loc='upper left')
        plt.xlabel('$x$')
        plt.ylabel('Actual and Predicted $f(x)$')
        
        Ys = forward_all_layers(Xt)
        Z = Ys[:-1]
        ploti = 2
        for layeri in range(n_hidden_layers, 0, -1):
            ploti += 1
            plt.subplot(nplots, 1, ploti)
            plt.plot(X, Z[layeri * 2 - 1].detach())
            plt.ylim(-1.1, 1.1)
            plt.xlabel('$x$')
            plt.ylabel(f'Hidden Layer {layeri}');
        
        ipd.clear_output(wait=True)
        ipd.display(fig)
ipd.clear_output(wait=True)

In [ ]: